import scrapy
from zongheng.items import ZonghengItem
from urllib import parse

class ZonghengSpiderSpider(scrapy.Spider):
    name = "zongheng_spider"
    allowed_domains = ["book.zongheng.com"]
    #对应的爬取的书名的列表地址
    url = 'http://book.zongheng.com/showchapter/325639.html'
    offset = 1
    start_urls = [url]

    def parse(self, response):
        
        detailUrls = response.xpath('//ul[@class="chapter-list clearfix"]/li/a/@href').extract()
        # detailTexts = response.xpath('//ul[@class="chapter-list clearfix"]/li/a/text()').extract()
        print(len(detailUrls))
        for detailUrl in detailUrls:
            print("爬取内容:{0}".format(detailUrl))
            item = ZonghengItem()
            yield scrapy.Request(detailUrl, callback=self.parseText,meta={"item":item})

    def parseText(self,response):
        title = response.xpath('//div[@class="title_txtbox"]/text()').extract()
        content = response.xpath('//div[@itemprop="acticleBody"]/p/text()').extract()
        content = "\n".join(content)
        content = content.strip("").strip('\r\n').replace(u'\u3000', u' ').replace(u'\xa0', u' ')
        item = response.meta['item']
        item['title'] = title[0]
        item['content'] = content
        yield item

    


        
